------------------------------------------------------------------------------------------------------------------------------------
      name:  plog_16
       log:  /accounts/projects/jr_ra/GRscarring/erratum/programs/prepare/statepop.log
  log type:  text
 opened on:  27 Nov 2024, 13:52:52

. ***************************************************************************************************************
. 
. * statepop.do
. * Makes panel of state population
. *
. * Source data:
. *  1) http://www.nber.org/data/census-intercensal-county-population.html 
. *     (through 2009)
. *  2) https://www2.census.gov/programs-surveys/popest/tables/2010-2019/state/totals/nst-est2017-01.xlsx
. *      (2010-2019)
. *  (Old: 3) Linear extrapolation to end of series)
. *
. *Updates:
. * JR, 4/30/18: Add 2017 data -- no need to extrapolate.
. * NR, 9/12/18: Fixed code so that DC population data correctly populates
. * JR, 4/14/20: Update through 2019 data
.       
. cap project, doinfo

. if _rc==0 {
.    local pdir "`r(pdir)'"                                                       // the project's main dir.
.    local dofile "`r(dofile)'"                                                   // do-file's stub name
.    local sig {bind:{hi:[`dofile'.dta. RP : `dofile'.do, `c(current_date)']}}    // a signature in notes
.    local doasproject=1
. }

. else {
.    local pdir "~/GRscarring"
.    local dofile "statepop"
.    local doasproject=0
. }

. 
. set more off

. local rootdir "`pdir'"

. local thisdir "`pdir'"

. 
. local prepdata "`pdir'/scratch"

. local rawdata "`pdir'/rawdata"

. local output "`pdir'/results"

. 
. ********************************************
. ********************************************
. ** 0.2 LOAD STATE/COUNTY POPULATION DATA ***
. ********************************************
. 
. * 1970-2014 (following code from cleanstatebudget.do, by Audrey Tiew, from LRS 2016 project):
. 
. ******************************************************************************
. * Filling in state population counts based on NBER data                      *
. * http://www.nber.org/data/census-intercensal-county-population.html         *
. ******************************************************************************
. 
. * Population counts based on counties
. if `doasproject'==1 project, original("`rawdata'/county_population.dta")
project GRscar_erratum > do-file uses original: "/scratch/public/jr_ra/GRscarring2024/erratum/rawdata/county_population.dta" filesig
> (3104007002:1961022)

. use "`rawdata'/county_population.dta", clear

. 
. * Using only intercensenal estimates (and the same version of each)
. drop pop19904 pop20104 base20104

. 
. * Fix DC:
. sort state_fips county_fips

. bysort state_fips: gen id = _n

. forvalues year=1970/2009 {
  2.         bysort state_fips: replace pop`year' = pop`year'[1] if state_fips==11
  3. }
(1 real change made)
(2 real changes made)
(2 real changes made)
(2 real changes made)
(2 real changes made)
(2 real changes made)
(2 real changes made)
(2 real changes made)
(1 real change made)
(2 real changes made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)

. 
. * Drop id
. drop id

. 
. * Drop state averages (don't have for every year but do for counties)
. drop if state_name==county_name & areaname==state_name
(53 observations deleted)

. 
. * Trimming strings
. foreach var of varlist fips state_name county_name fipsst {
  2.         replace `var'=trim(`var')
  3. }
(0 real changes made)
(3,142 real changes made)
(0 real changes made)
(0 real changes made)

. 
. 
. *Code below confirms that each of the 2 observations for the same county has population for different years
. *bysort fips: gen count=_n
. *tab count
. *drop state_fips county_fips areaname state_name county_name fipsst fipsco region division base20104
. *reshape long pop, i(fips count) j(year)
. *reshape wide pop, i(fips year) j(count)
. *count if pop1!=. & pop2!=.
. 
. collapse (sum) pop* (firstnm) state_name, by(fipsst)

. 
. *checking ratios between years
. gen max_ratio_yr = pop1971/pop1970

. forvalues yr=1972/2014 {
  2.         local yrbf=`yr'-1
  3.         replace max_ratio_yr = pop`yr'/pop`yrbf' if pop`yr'/pop`yrbf'>max_ratio_yr
  4. }
(4 real changes made)
(0 real changes made)
(2 real changes made)
(4 real changes made)
(2 real changes made)
(3 real changes made)
(4 real changes made)
(3 real changes made)
(0 real changes made)
(5 real changes made)
(3 real changes made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(0 real changes made)
(1 real change made)
(1 real change made)
(1 real change made)
(1 real change made)
(4 real changes made)
(1 real change made)
(0 real changes made)
(0 real changes made)
(0 real changes made)
(0 real changes made)
(0 real changes made)
(0 real changes made)
(38 real changes made)
(0 real changes made)
(0 real changes made)
(0 real changes made)
(0 real changes made)
(0 real changes made)
(0 real changes made)
(1 real change made)
(0 real changes made)
(0 real changes made)
(2 real changes made)
(0 real changes made)
(0 real changes made)
(0 real changes made)
(0 real changes made)

. 
. sum max_ratio_yr

    Variable |        Obs        Mean    Std. dev.       Min        Max
-------------+---------------------------------------------------------
max_ratio_yr |         51    1.039676    .0215126   1.009522   1.115494

. *the largest year to year change is about 10%
. drop max_ratio_yr

. 
. reshape long pop, i(fipsst state_name) j(year)
(j = 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 19
> 95 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014)

Data                               Wide   ->   Long
-----------------------------------------------------------------------------
Number of observations               51   ->   2,295       
Number of variables                  47   ->   4           
j variable (45 values)                    ->   year
xij variables:
            pop1970 pop1971 ... pop2014   ->   pop
-----------------------------------------------------------------------------

. 
. sort year fipsst

. replace state_name=strupper(state_name)
(2,295 real changes made)

. rename state_name state

. 
. tempfile state_pop

. save `state_pop'
file /tmp/St2868349.000004 saved as .dta format

. 
. 
. 
. *Replace 2010-2018 data with the most recent
. *Downloaded from https://www.census.gov/data/datasets/time-series/demo/popest/2010s-state-total.html
. *on 4/2/2019
. * Update: From https://www2.census.gov/programs-surveys/popest/tables/2010-2019/state/totals/nst-est2019-01.xlsx
. *on 4/14/2020, then saved as csv.
. if `doasproject'==1 project, original("`rawdata'/nst-est2018-01.csv")
project GRscar_erratum > do-file uses original: "/scratch/public/jr_ra/GRscarring2024/erratum/rawdata/nst-est2018-01.csv" filesig(12
> 29315048:10279)

. 
. import delimited using "`rawdata'/nst-est2018-01.csv", clear varnames(4)
(13 vars, 63 obs)

. drop census estimatesbase

. forvalues i=4/12 {
  2.         local j=`i'+2006
  3.         ren v`i' pop`j' 
  4. }

. ren v1 state_name

. replace state_name=subinstr(state_name,".","",1)
(54 real changes made)

. tab state_name

                             state_name |      Freq.     Percent        Cum.
----------------------------------------+-----------------------------------
                                Alabama |          1        1.61        1.61
                                 Alaska |          1        1.61        3.23
                                Arizona |          1        1.61        4.84
                               Arkansas |          1        1.61        6.45
                             California |          1        1.61        8.06
                               Colorado |          1        1.61        9.68
                            Connecticut |          1        1.61       11.29
                               Delaware |          1        1.61       12.90
                   District of Columbia |          1        1.61       14.52
                                Florida |          1        1.61       16.13
                                Georgia |          1        1.61       17.74
                                 Hawaii |          1        1.61       19.35
                                  Idaho |          1        1.61       20.97
                               Illinois |          1        1.61       22.58
                                Indiana |          1        1.61       24.19
                                   Iowa |          1        1.61       25.81
                                 Kansas |          1        1.61       27.42
                               Kentucky |          1        1.61       29.03
                              Louisiana |          1        1.61       30.65
                                  Maine |          1        1.61       32.26
                               Maryland |          1        1.61       33.87
                          Massachusetts |          1        1.61       35.48
                               Michigan |          1        1.61       37.10
                                Midwest |          1        1.61       38.71
                              Minnesota |          1        1.61       40.32
                            Mississippi |          1        1.61       41.94
                               Missouri |          1        1.61       43.55
                                Montana |          1        1.61       45.16
                               Nebraska |          1        1.61       46.77
                                 Nevada |          1        1.61       48.39
                          New Hampshire |          1        1.61       50.00
                             New Jersey |          1        1.61       51.61
                             New Mexico |          1        1.61       53.23
                               New York |          1        1.61       54.84
                         North Carolina |          1        1.61       56.45
                           North Dakota |          1        1.61       58.06
                              Northeast |          1        1.61       59.68
Note: The estimates are based on the .. |          1        1.61       61.29
                                   Ohio |          1        1.61       62.90
                               Oklahoma |          1        1.61       64.52
                                 Oregon |          1        1.61       66.13
                           Pennsylvania |          1        1.61       67.74
                            Puerto Rico |          1        1.61       69.35
            Release Date: December 2019 |          1        1.61       70.97
                           Rhode Island |          1        1.61       72.58
Source: US. Census Bureau, Population.. |          1        1.61       74.19
                                  South |          1        1.61       75.81
                         South Carolina |          1        1.61       77.42
                           South Dakota |          1        1.61       79.03
                    Suggested Citation: |          1        1.61       80.65
Table 1 Annual Estimates of the Resid.. |          1        1.61       82.26
                              Tennessee |          1        1.61       83.87
                                  Texas |          1        1.61       85.48
                          United States |          1        1.61       87.10
                                   Utah |          1        1.61       88.71
                                Vermont |          1        1.61       90.32
                               Virginia |          1        1.61       91.94
                             Washington |          1        1.61       93.55
                                   West |          1        1.61       95.16
                          West Virginia |          1        1.61       96.77
                              Wisconsin |          1        1.61       98.39
                                Wyoming |          1        1.61      100.00
----------------------------------------+-----------------------------------
                                  Total |         62      100.00

. drop if state_name=="Midwest"
(1 observation deleted)

. drop if state_name=="Northeast"
(1 observation deleted)

. drop if state_name=="United States"
(1 observation deleted)

. drop if state_name=="South"
(1 observation deleted)

. drop if state_name=="West"
(1 observation deleted)

. drop if state_name=="Puerto Rico"
(1 observation deleted)

. 
. destring pop*, replace ignore(",")
pop2010: byte , removed; replaced as long
(6 missing values generated)
pop2011: byte , removed; replaced as long
(6 missing values generated)
pop2012: byte , removed; replaced as long
(6 missing values generated)
pop2013: byte , removed; replaced as long
(6 missing values generated)
pop2014: byte , removed; replaced as long
(6 missing values generated)
pop2015: byte , removed; replaced as long
(6 missing values generated)
pop2016: byte , removed; replaced as long
(6 missing values generated)
pop2017: byte , removed; replaced as long
(6 missing values generated)
pop2018: byte , removed; replaced as long
(6 missing values generated)

. 
. *Extrapolate to 2017.
. *NOTE: THIS NEEDS TO BE CHANGED WHEN WE GET MORE CURRENT DATA
. *gen pop2017=(pop2016/pop2015)*pop2016
. *gen pop2018=(pop2017/pop2016)*pop2017
. 
. reshape long pop, i(state_name) j(year)
(j = 2010 2011 2012 2013 2014 2015 2016 2017 2018)

Data                               Wide   ->   Long
-----------------------------------------------------------------------------
Number of observations               57   ->   513         
Number of variables                  11   ->   4           
j variable (9 values)                     ->   year
xij variables:
            pop2010 pop2011 ... pop2018   ->   pop
-----------------------------------------------------------------------------

. 
. keep if inlist(year, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019)
(0 observations deleted)

. tab state_name if pop==.

                             state_name |      Freq.     Percent        Cum.
----------------------------------------+-----------------------------------
Note: The estimates are based on the .. |          9       20.00       20.00
            Release Date: December 2019 |          9       20.00       40.00
Source: US. Census Bureau, Population.. |          9       20.00       60.00
                    Suggested Citation: |          9       20.00       80.00
Table 1 Annual Estimates of the Resid.. |          9       20.00      100.00
----------------------------------------+-----------------------------------
                                  Total |         45      100.00

. drop if pop==.
(54 observations deleted)

. *destring pop, replace ignore(",")
. tempfile state_pop_2010s

. save `state_pop_2010s'
file /tmp/St2868349.000005 saved as .dta format

. 
. use `state_pop'

. drop if year>=2010
(255 observations deleted)

. append using `state_pop_2010s'

. 
. 
. replace state=strupper(state_name) if state==""
(459 real changes made)

. sort state fipsst

. by state: replace fipsst=fipsst[_N] if fipsst==""
(459 real changes made)

. assert fipsst!=""

. drop state_name

. destring fipsst, replace
fipsst: all characters numeric; replaced as byte

. ren state state_name

. replace state_name=strproper(state_name)
(2,499 real changes made)

. replace state_name="District of Columbia" if state_name=="District Of Columbia"
(49 real changes made)

. isid state_name year

. 
. save `prepdata'/`dofile'.dta, replace
(file /accounts/projects/jr_ra/GRscarring/erratum/scratch/statepop.dta not found)
file /accounts/projects/jr_ra/GRscarring/erratum/scratch/statepop.dta saved

. if `doasproject'==1 project, creates(`prepdata'/`dofile'.dta)
project GRscar_erratum > do-file creates: "/scratch/public/jr_ra/GRscarring2024/erratum/scratch/statepop.dta" filesig(3467904037:115
> 254)

. 
. ***************************************************************************************************************
. ***************************************************************************************************************
. 
end of do-file
      name:  plog_16
       log:  /accounts/projects/jr_ra/GRscarring/erratum/programs/prepare/statepop.log
  log type:  text
 closed on:  27 Nov 2024, 13:52:52
------------------------------------------------------------------------------------------------------------------------------------
